Here, we will reanalyse the 3 cases of CLL that we published in our previous study to check:
This will give us a negative control to ensure our observations are specific or not to Richter transformation.
library(Seurat)
library(SeuratWrappers)
library(harmony)
library(ggpubr)
library(clusterProfiler)
library(org.Hs.eg.db)
library(tidyverse)
path_to_obj <- here::here("results/R_objects/cll_seurat_annotated.rds")
path_to_save <- here::here("results/R_objects/cll_seurat_annotated_list.rds")
# Thresholds
max_pct_mt <- 22.5
min_n_features <- 600
# Source functions
source(here::here("bin/utils.R"))
seurat <- readRDS(path_to_obj)
colnames(seurat@meta.data)[colnames(seurat@meta.data) == "cell_type"] <- "annotation"
DimPlot(seurat, group.by = "annotation")
# Exclude microenvironment
selected_cells_1 <- seurat$annotation %in% c("CLL 1892", "CLL 1472", "CLL 1220")
seurat <- subset(seurat, cells = colnames(seurat)[selected_cells_1])
DimPlot(seurat)
# Keep only cells stored at RT or 4C for <= 2h
selected_cells_2 <-
(seurat$time %in% c("0h", "2h") & seurat$temperature == "RT") |
(seurat$temperature == "4C")
seurat <- subset(seurat, cells = colnames(seurat)[selected_cells_2])
DimPlot(seurat, group.by = "temperature")
table(seurat$temperature, seurat$time)
##
## 0h 2h 4h 6h 8h 24h
## 4C 3863 1264 1274 996 1507 1387
## RT 3893 3502 0 0 0 0
# Apply more stringent thresholds (% mitochondrial expression)
FeatureScatter(seurat, "nFeature_RNA", "percent_mt")
hist(seurat$percent_mt, 100)
seurat <- subset(
seurat,
subset = percent_mt < max_pct_mt & nFeature_RNA > min_n_features
)
# Split by donor and reprocess
seurat_list <- SplitObject(seurat, split.by = "donor")
donors <- c("1220", "1472", "1892")
seurat_list <- seurat_list[donors]
seurat_list <- purrr::map(donors, function(x) {
seurat_obj <- seurat_list[[x]]
seurat_obj <- seurat_obj %>%
NormalizeData() %>%
FindVariableFeatures() %>%
ScaleData() %>%
RunPCA()
if (x == "1220") {
seurat_obj <- seurat_obj %>%
RunUMAP(reduction = "pca", dims = 1:20)
} else if (x %in% c("1472", "1892")) {
seurat_obj <- seurat_obj %>%
RunHarmony(
reduction = "pca",
dims = 1:20,
group.by.vars = "temperature"
) %>%
RunUMAP(reduction = "harmony", dims = 1:20)
}
seurat_obj
})
names(seurat_list) <- donors
feat_plots_1 <- purrr::map(seurat_list, function(seurat_obj) {
ps <- purrr::map(c("CXCR4", "CD27", "MIR155HG"), function(x) {
p <- FeaturePlot(seurat_obj, features = x, pt.size = 0.65) +
scale_color_viridis_c(option = "magma")
})
ggarrange(plotlist = ps, ncol = 3, common.legend = TRUE)
})
feat_plots_1
## $`1220`
##
## $`1472`
##
## $`1892`
Overall, we can see how the main driver of variance is again the exposure to the lymph node microenvironment lymph. In the 3 donors, we see 3 main regions, with mutually exclusive expression of CXCR4, CD27 and MIR155HG.
In addition, we see that for 1892 we can identify a minor subclon:
resolutions <- c(0.2, 0.2)
seurat_list[c("1472", "1892")] <- purrr::map2(
seurat_list[c("1472", "1892")],
resolutions,
function(seurat_obj, res) {
seurat_obj <- FindNeighbors(
seurat_obj,
reduction = "harmony",
dims = 1:20
)
seurat_obj <- FindClusters(seurat_obj, resolution = res)
seurat_obj
})
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 4975
## Number of edges: 138155
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8073
## Number of communities: 2
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 9116
## Number of edges: 254574
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8644
## Number of communities: 5
## Elapsed time: 1 seconds
DimPlot(seurat_list$`1472`)
DimPlot(seurat_list$`1892`)
clusters_of_interest <- list(
"1472" = c("1"),
"1892" = c("3", "4")
)
# Markers
markers_all <- purrr::map(
seurat_list[c("1472", "1892")],
FindAllMarkers,
only.pos = FALSE,
logfc.threshold = 0.1
)
markers_all <- purrr::map(names(markers_all), function(x) {
df <- markers_all[[x]]
df <- df %>%
dplyr::filter(cluster %in% clusters_of_interest[[x]], p_val_adj < 0.001) %>%
group_by(cluster) %>%
dplyr::arrange(desc(avg_log2FC), .by_group = TRUE) %>%
ungroup()
df
})
names(markers_all) <- names(clusters_of_interest)
DT::datatable(markers_all$`1472`)
DT::datatable(markers_all$`1892`)
markers_of_interest <- c("PCDH9", "TTN", "FCRL5", "BCL11A", "ATM", "CHD2",
"OGA", "OGT", "PAX5", "DDX17", "BACH2", "BCL2",
"BIRC3", "IL4R", "KMT2A", "GLUL", "S100A10")
markers_of_interest_gg <- purrr::map(markers_of_interest, function(x) {
ps <- purrr::map(names(seurat_list), function(donor) {
p <- FeaturePlot(seurat_list[[donor]], features = x, pt.size = 0.6) +
scale_color_viridis_c(option = "inferno") +
ggtitle(str_c(donor, x, sep = "_")) +
theme(plot.title = element_text(size = 13, hjust = 0.5))
})
out <- ggarrange(plotlist = ps, ncol = 3, common.legend = TRUE)
})
names(markers_of_interest_gg) <- markers_of_interest
markers_of_interest_gg
## $PCDH9
##
## $TTN
##
## $FCRL5
##
## $BCL11A
##
## $ATM
##
## $CHD2
##
## $OGA
##
## $OGT
##
## $PAX5
##
## $DDX17
##
## $BACH2
##
## $BCL2
##
## $BIRC3
##
## $IL4R
##
## $KMT2A
##
## $GLUL
##
## $S100A10
To shed light into the identity of these novel clusters, let us perform a GSEA with clusterProfiler.
gene_lists <- list(
"1472" = markers_all$`1472`[markers_all$`1472`$cluster == "1", "avg_log2FC", drop = TRUE],
"1892" = markers_all$`1892`[markers_all$`1892`$cluster == "4", "avg_log2FC", drop = TRUE]
)
names(gene_lists$`1472`) <- markers_all$`1472`$gene[markers_all$`1472`$cluster == "1"]
names(gene_lists$`1892`) <- markers_all$`1892`$gene[markers_all$`1892`$cluster == "4"]
gsea_pcdh9_clusters <- purrr::map(gene_lists, function(x) {
gsea_results <- gseGO(
x,
ont = "BP",
OrgDb = org.Hs.eg.db,
keyType = "SYMBOL",
minGSSize = 30,
maxGSSize = 300,
seed = TRUE
)
gsea_results <- clusterProfiler::simplify(gsea_results, cutoff = 0.35)
gsea_results@result <- gsea_results@result %>%
arrange(desc(NES))
gsea_results
})
DT::datatable(gsea_pcdh9_clusters$`1472`@result)
DT::datatable(gsea_pcdh9_clusters$`1892`@result)
oxphos_gg <- purrr::map(gsea_pcdh9_clusters, function(obj) {
p <- gseaplot(
obj,
geneSetID = "GO:0006119",
by = "runningScore",
title = "oxidative phosphorylation"
)
p
})
oxphos_gg
## $`1472`
##
## $`1892`
bcr_gg <- purrr::map(gsea_pcdh9_clusters, function(obj) {
p <- gseaplot(
obj,
geneSetID = "GO:0050853",
by = "runningScore",
title = "BCR signaling"
)
p
})
bcr_gg
## $`1472`
##
## $`1892`
signatures <- list(
b_cell_signaling = gsea_pcdh9_clusters$`1892`@geneSets$`GO:0050853`,
oxphos = gsea_pcdh9_clusters$`1892`@geneSets$`GO:0006119`,
mitochondrial_translation = gsea_pcdh9_clusters$`1892`@geneSets$`GO:0032543`
)
seurat_list <- purrr::map(seurat_list, function(seurat_obj) {
seurat_obj <- AddModuleScore(
seurat_obj,
features = signatures,
name = names(signatures)
)
seurat_obj
})
signatures_pots <- purrr::map(
c("b_cell_signaling1", "oxphos2", "mitochondrial_translation3"),
function(x) {
p1 <- FeaturePlot(seurat_list$`1220`, x, pt.size = 0.7) +
scale_color_viridis_c(option = "magma")
p2 <- FeaturePlot(seurat_list$`1472`, x, pt.size = 0.7) +
scale_color_viridis_c(option = "magma")
p3 <- FeaturePlot(seurat_list$`1892`, x, pt.size = 0.7) +
scale_color_viridis_c(option = "magma")
out <- ggarrange(
plotlist = list(p1, p2, p3),
ncol = 3,
common.legend = TRUE
)
out
})
signatures_pots
## [[1]]
##
## [[2]]
##
## [[3]]
saveRDS(seurat_list, path_to_save)
sessionInfo()
## R version 4.0.4 (2021-02-15)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=es_ES.UTF-8 LC_COLLATE=en_US.UTF-8 LC_MONETARY=es_ES.UTF-8 LC_MESSAGES=en_US.UTF-8 LC_PAPER=es_ES.UTF-8 LC_NAME=C LC_ADDRESS=C LC_TELEPHONE=C LC_MEASUREMENT=es_ES.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] forcats_0.5.1 stringr_1.4.0 dplyr_1.0.6 purrr_0.3.4 readr_1.4.0 tidyr_1.1.3 tibble_3.1.2 tidyverse_1.3.1 org.Hs.eg.db_3.12.0 AnnotationDbi_1.52.0 IRanges_2.24.1 S4Vectors_0.28.1 Biobase_2.50.0 BiocGenerics_0.36.1 clusterProfiler_3.18.1 ggpubr_0.4.0 ggplot2_3.3.3 harmony_1.0 Rcpp_1.0.6 SeuratWrappers_0.3.0 SeuratObject_4.0.2 Seurat_4.0.3 BiocStyle_2.18.1
##
## loaded via a namespace (and not attached):
## [1] utf8_1.2.1 reticulate_1.20 tidyselect_1.1.1 RSQLite_2.2.7 htmlwidgets_1.5.3 grid_4.0.4 BiocParallel_1.24.1 Rtsne_0.15 scatterpie_0.1.6 munsell_0.5.0 codetools_0.2-18 ica_1.0-2 DT_0.18 future_1.21.0 miniUI_0.1.1.1 withr_2.4.2 colorspace_2.0-1 GOSemSim_2.16.1 highr_0.9 knitr_1.33 rstudioapi_0.13 ROCR_1.0-11 ggsignif_0.6.2 tensor_1.5 DOSE_3.16.0 listenv_0.8.0 labeling_0.4.2 polyclip_1.10-0 bit64_4.0.5 farver_2.1.0 rprojroot_2.0.2 downloader_0.4 parallelly_1.26.0 vctrs_0.3.8 generics_0.1.0 xfun_0.23 R6_2.5.0 graphlayouts_0.7.1 rsvd_1.0.5 spatstat.utils_2.2-0 cachem_1.0.5 fgsea_1.16.0 assertthat_0.2.1 promises_1.2.0.1 scales_1.1.1 ggraph_2.0.5 enrichplot_1.10.2 gtable_0.3.0 globals_0.14.0 goftest_1.2-2 tidygraph_1.2.0 rlang_0.4.11 splines_4.0.4 rstatix_0.7.0
## [55] lazyeval_0.2.2 spatstat.geom_2.1-0 broom_0.7.7 modelr_0.1.8 BiocManager_1.30.15 yaml_2.2.1 reshape2_1.4.4 abind_1.4-5 crosstalk_1.1.1 backports_1.2.1 httpuv_1.6.1 qvalue_2.22.0 tools_4.0.4 bookdown_0.22 ellipsis_0.3.2 spatstat.core_2.1-2 jquerylib_0.1.4 RColorBrewer_1.1-2 ggridges_0.5.3 plyr_1.8.6 rpart_4.1-15 deldir_0.2-10 viridis_0.6.1 pbapply_1.4-3 cowplot_1.1.1 zoo_1.8-9 haven_2.4.1 ggrepel_0.9.1 cluster_2.1.1 here_1.0.1 fs_1.5.0 magrittr_2.0.1 RSpectra_0.16-0 data.table_1.14.0 scattermore_0.7 DO.db_2.9 openxlsx_4.2.3 reprex_2.0.0 lmtest_0.9-38 RANN_2.6.1 fitdistrplus_1.1-5 matrixStats_0.59.0 hms_1.1.0 patchwork_1.1.1 mime_0.10 evaluate_0.14 xtable_1.8-4 rio_0.5.26 readxl_1.3.1 gridExtra_2.3 compiler_4.0.4 shadowtext_0.0.8 KernSmooth_2.23-18 crayon_1.4.1
## [109] htmltools_0.5.1.1 mgcv_1.8-36 later_1.2.0 lubridate_1.7.10 DBI_1.1.1 tweenr_1.0.2 dbplyr_2.1.1 MASS_7.3-53.1 Matrix_1.3-4 car_3.0-10 cli_2.5.0 igraph_1.2.6 pkgconfig_2.0.3 rvcheck_0.1.8 foreign_0.8-81 plotly_4.9.4 spatstat.sparse_2.0-0 xml2_1.3.2 bslib_0.2.5.1 rvest_1.0.0 digest_0.6.27 sctransform_0.3.2 RcppAnnoy_0.0.18 spatstat.data_2.1-0 rmarkdown_2.8 cellranger_1.1.0 leiden_0.3.8 fastmatch_1.1-0 uwot_0.1.10 curl_4.3.1 shiny_1.6.0 lifecycle_1.0.0 nlme_3.1-152 jsonlite_1.7.2 carData_3.0-4 limma_3.46.0 viridisLite_0.4.0 fansi_0.5.0 pillar_1.6.1 lattice_0.20-41 fastmap_1.1.0 httr_1.4.2 survival_3.2-10 GO.db_3.12.1 glue_1.4.2 remotes_2.4.0 zip_2.2.0 png_0.1-7 bit_4.0.4 ggforce_0.3.3 stringi_1.6.2 sass_0.4.0 blob_1.2.1 memoise_2.0.0
## [163] irlba_2.3.3 future.apply_1.7.0